#模型下载地址：http://modelscope.cn/models/sogagaga/GLM-4.5V-w4a8

export VLLM_W8A8_MOE_USE_W4A8=1 
vllm serve /data/ZhipuAI/GLM-4.5V-w4a8 -tp 4 --port 12345 --compilation-config '{"cudagraph_mode": "FULL_DECODE_ONLY", "level": 0}'

# curl -X POST http://0.0.0.0:12345/v1/chat/completions -H "Content-Type: application/json" -d '{
#     "model": "/data/ZhipuAI/GLM-4.5V-w4a8",
#     "messages": [
#       {
#         "role": "user",
#         "content": [
#           {
#             "type": "image_url",
#             "image_url": {
#               "url": "https://cdn.bigmodel.cn/static/logo/register.png"
#             }
#           },
#           {
#             "type": "image_url",
#             "image_url": {
#               "url": "https://cdn.bigmodel.cn/static/logo/api-key.png"
#             }
#           },
#           {
#             "type": "text",
#             "text": "这两张图是甚么意思？在讲一件甚么事情，有什么作用？?"
#           }
#         ]
#       }
#     ],
#     "thinking": {
#       "type": "enabled"
#     }
#   }'